*Table error




  use "$FA_datasets/data_landarea", clear

 
*how many GPS observationsm issing in original dataset
// gen hasGPS=0 if parcel_size_farmer!=. &   parcel_size_farmer!=0
// recode hasGPS 0=1 if parcel_size_gps!=. & parcel_size_gps!=0 
// tab hasGPS

// hasGPS	Freq.	Percent	Cum.
//			
// 0	10,517	51.49	51.49
// 1	9,907	48.51	100.00
//			
// Total	20,424	100.00
* 20,424 parcel-surveyrounds, 48\% have GPS data, we focus analysis on this subsample

keep if parcel_size_gps!=. & parcel_size_gps!=0 /*keep only obs with positive measured area GPS and self reported*/
drop if parcel_size_farmer==. |   parcel_size_farmer==0


*construct measure of farm size at HH level
cap drop firstobs
bys hhid surveyround  :  egen farm_size_farmer=sum(parcel_size_farmer)
bys hhid surveyround  :  egen farm_size_gps=sum(parcel_size_gps)
bys hhid surveyround  :   gen firstobs=_n

gen ln_farm_size_farmer=ln(farm_size_farmer)
gen ln_farm_size_gps=ln(farm_size_gps)

gen ln_parcel_size_farmer=ln(parcel_size_farmer)
gen ln_parcel_size_gps=ln(parcel_size_gps)

 
     *DEFINITION 1: relative error (SR-GPS)/GPS
    gen  error_rel_farm=  (farm_size_farmer - farm_size_gps)/farm_size_gps   if firstobs ==1 
gen     error_rel_parcel=  (parcel_size_farmer -  parcel_size_gps )/ parcel_size_gps 
  
   
  *DEFINITION 2   ln_error = ln SR - lnGPS
  *as in Abay et al (2020) l
  
 gen ln_error_farm=ln_farm_size_farmer - ln_farm_size_gps if firstobs ==1 
gen   ln_error_parcel=ln_parcel_size_farmer - ln_parcel_size_gps    
  
  
   
*no. observations
sum ln_parcel_size_gps
sum  ln_farm_size_gps if firstobs ==1  
  
 
 
 
  *trim 1% tails
  
   sum error_rel_parcel, de
 replace error_rel_parcel =. if error_rel_parcel<r(p1) | error_rel_parcel>r(p99)
 
  sum error_rel_farm, de
   replace error_rel_farm =. if error_rel_farm<r(p1) | error_rel_farm>r(p99)

  
        sum ln_error_parcel, de
 replace ln_error_parcel =. if ln_error_parcel<r(p1) | ln_error_parcel>r(p99)
 
 
   sum ln_error_farm, de
 replace ln_error_farm =. if ln_error_farm<r(p1) | ln_error_farm>r(p99)
 

 
 
  *A. program to calculate difference of relative bias  
    *--------------------------------------
	
 cap program drop my_error_mean 

 program define my_error_mean , rclass
   	 quietly  sum  error_rel_parcel      
	 quietly  local mean_error_parcel=`r(mean)'

 
	  quietly 	sum  error_rel_farm   if firstobs ==1  
	  quietly   local mean_error_farm=`r(mean)'

	  
	  
	  return scalar mean_error_parcel = `mean_error_parcel'
	  return scalar mean_error_farm= `mean_error_farm'
	  return scalar mean_error_diff= `mean_error_parcel' -  `mean_error_farm'
  end
 
  
 

 
 
  *B. program to calculate relative variance of ln error as suggested by referee
    *--------------------------------------
	
 cap program drop my_var_error

 program define my_var_error , rclass
   	 quietly  sum  ln_error_parcel      
	 quietly  local var_error1=`r(Var)'
	 
	 quietly  sum  ln_parcel_size_gps if  ln_error_parcel!=.
	 quietly   local var_true1=`r(Var)'
 	local ratio_parcel= `var_error1' /  `var_true1' 

	  quietly 	sum  ln_error_farm    if firstobs ==1  
	  quietly   local var_error2=`r(Var)'
	  
	  quietly sum  ln_farm_size_gps if  ln_error_farm!=. & firstobs ==1  
	  quietly  local var_true2=`r(Var)'
	  
	local ratio_farm= `var_error2' /  `var_true2' 
	  
	  return scalar var_error_parcel = `ratio_parcel'
	  return scalar var_error_farm= `ratio_farm'
	  return scalar var_error_diff= `ratio_parcel' -  `ratio_farm'
  end


 

  
    *provides mean, SE and p-value of of difference of relative error
  bootstrap  r(mean_error_parcel) r(mean_error_farm) r(mean_error_diff), reps(100): my_error_mean 
  
 
  
  
  *provides ratio, SE and p-vlau of difference 
  bootstrap  r(var_error_parcel) r(var_error_farm) r(var_error_diff), reps(100): my_var_error 

  

 
 
 
 
 
 
 
  
 

  